########################################################################
# CreateDistanceVariables.R
# This file merges in the variables coding the distance 
# between politician home towns and each school included in
# the experiment.
#
# Note that to comply with ethics commitments, we cannot share the details
# of politician home towns or their coordinates. Since our code relies on these, 
# a replication of these variables is not possible.
# In lieu of an exact replication, include below (commented out) the code that 
# was used to calculate the driving and walking distances between
# each school and politician home towns using the google mapping API. 
#
# Note that the village names themselves come from self-responses by politicians
# These responses were coded to specific locations in a partially manual process of 
# matching these responses to village names from the census, or (where that was not possible) with the aid of
# the geonames API  (http://www.geonames.org/export/web-services.html)
#
# Please address any questions about this process to Ryan Jablonski, r.s.jablonski@lse.ac.uk
#
# Log
# Created October 2021
# Edited for APSR replication 17 August 2023 by Ryan Jablonski
########################################################################


# first merge in variables for each school included in the survey
distances=read.csv("./input/school_home_distances.csv")
all.surveys=read.csv("./output/all_withcovariates.csv", stringsAsFactors = FALSE)
all.surveys$id=as.character(paste(all.surveys$lc, all.surveys$school_id, sep="."))
distances$id=as.character(paste(distances$lc, distances$school_id, sep="."))

all.surveys.merge=merge(all.surveys, distances[,names(distances)[!names(distances) %in% c("school_id", "X", "lc")]], by="id")
#all.surveys.merge=merge(all.surveys, distances[,c(4:8)], by="id")

write.csv(all.surveys.merge,"./output/all.surveys.withgoogledistance.csv")


# do the same thing for all schools eligible for the LC survey
distances=read.csv("./input/school_home_distances_all_lc_schools.csv")
schools=read.csv("./output/Schools.forLC.withpoppoverty.csv", stringsAsFactors = FALSE)

schools.merge=merge(schools, distances[,names(distances)[!names(distances) %in% c("X", "lc")]], by="school_id")
#schools.merge=merge(schools, distances[,c(2:5)], by="school_id")

write.csv(schools.merge,"./output/Schools.forLC.withdistances.csv")



# do the same thing for all schools eligible for the MP survey
distances=read.csv("./input/school_home_distances_all_mp_schools.csv")
schools=read.csv("./output/Schools.forMP.withpoppoverty.csv", stringsAsFactors = FALSE)

schools.merge=merge(schools, distances[,names(distances)[!names(distances) %in% c("X", "lc")]], by="school_id")

write.csv(schools.merge,"./output/Schools.forMP.withdistances.csv")


########### CODE TO CREATE DISTANCE VARIABLES. FOR REFERENCE ONLY. ########
# 
# library(gmapsdistance)
# library(ggmap)
# library(geosphere)
# 
# apikey=""
# set.api.key(apikey)
# 
# all.surveys=read.csv("./output/all.surveys.withvillages.csv", stringsAsFactors = FALSE)
# 
# all.surveys$km_to_home_driving=NA
# all.surveys$km_to_home_walking=NA
# all.surveys$time_to_home_driving=NA
# all.surveys$time_to_home_walking=NA
# 
# all.surveys$row_id=c(1:nrow(all.surveys))
# 
# 
# this.df=all.surveys[is.na(all.surveys$km_to_home_driving),]
# 
# for(i in 1:nrow(this.df)){
#   # i=1
#   lat1=this.df$school_latitude[i]
#   lon1=this.df$school_longitude[i]
#   loc1=paste0(lat1,",",lon1)
#   
#   lat2=this.df$home_lat[i]
#   lon2=this.df$home_long[i]
#   loc2=paste0(lat2,",",lon2)
#   
#   
#   test=tryCatch({
#     results = gmapsdistance(origin = trimws(loc1),
#                             destination = trimws(loc2),
#                             mode = "driving", key=apikey) 
#     
#   })
#   if(results$Status=="ROUTE_NOT_FOUND"){
#     
#     this.df$km_to_home_driving[i]=  -999
#     this.df$time_to_home_driving[i] = -999
#     
#   }else{
#     
#     this.df$km_to_home_driving[i]=results$Distance/1000
#     this.df$time_to_home_driving[i]=results$Time
#     
#   }
#   
#   
#   test=tryCatch({
#     results = gmapsdistance(origin = trimws(loc1),
#                             destination = trimws(loc2),
#                             mode = "walking", key=apikey) 
#     
#   })
#   if(results$Status=="ROUTE_NOT_FOUND"){
#     
#     this.df$km_to_home_walking[i]=  -999
#     this.df$time_to_home_walking[i] = -999
#     
#   }else{
#     
#     this.df$km_to_home_walking[i]=results$Distance/1000
#     this.df$time_to_home_walking[i]=results$Time
#     
#   }
#   
# }
# 
# all.surveys$km_to_home_best=ifelse(is.na(all.surveys$km_to_home_driving) | all.surveys$km_to_home_driving<0, all.surveys$km_to_home_walking, all.surveys$km_to_home_driving)
# 
# 
# write.csv(all.surveys, "./output/schools_lc_with_googledistances.csv")

